library(tidyverse)
library(ggsci) # scale_color_npg
library(tools) # file_path_sans_ext
library(phyloseq)
library(microbiome)
library(eulerr) # venn diagram
library(ggpubr) # as_ggplot
library(patchwork) # arrange multiple plots
library(vegan) # multi-response permutation procedure (MRPP) and adonis
library(hrbrthemes) # scale_y_percent
library(DESeq2)
library(ComplexHeatmap) # heatmap
# heatmaptree
library(ggtree)
library(treeio)
library(circlize) # colorRamp2
library(ggfortify) # pca
library(ggsignif) # geom_signif
# ecological network
library(SpiecEasi)
library(igraph)
library(rsvg) # read svg
library(grImport2) # read svg
library(grid)
library(furrr)
library(writexl) # write table s6
sourceDir <- function(path, trace = TRUE, ...) {
for (nm in list.files(path, pattern = "[.][RrSsQq]$")) {
if (!grepl("visualize_save_network", nm, fixed = TRUE)) {
if (trace) cat(nm,":")
source(file.path(path, nm), ...)
if (trace) cat("\n")
} else {
message("Do not source visualize_save_network_RCy3.R")
}
}
}
sourceDir("src/R/")
## compare_groups.R :
## ecological_network.R :
## generate_venn_diagram.R :
## get_unique_genus.R :
## plot_composition.R :
## plot_rarecurve.R :
To confirm the protective role of HA, the rectal temperature (Tre), a higher, more reliable and more stable method to measure core body temperature, was assessed and compared between HA and CR subjects. Supplementary Figure S1 shows that the Tre profile of HA subjects significantly increased in response to the first and second weeks of heat exposure followed by a slow and stable change to a Tre that was significantly decreased to the same level as before exposure.
tre <- read_tsv("data/raw/tre.tsv")
p_s1a <- ggplot(tre) +
stat_summary(
fun.y = "mean", aes(x = days, y = rt, color = state),
geom = "point", size = 3) +
stat_summary(fun.y = "mean", aes(x = days, y = rt, color = state),
geom = "line") +
labs(x = "Days", y = "Rectal temperature (°C)") +
guides(color = guide_legend(title = NULL)) +
# scale_x_date(date_breaks = "2 day", date_labels = "%m/%d") +
theme_bw() +
scale_color_manual(values = c("#4DBBD5FF", "#E64B35FF")) +
scale_x_continuous(
breaks = seq.int(0, 36, by = 2),
labels = seq.int(0, 36, by = 2)
) +
theme(legend.position = c(0.9, 0.8))
weight <- read_tsv("data/raw/weight.tsv")
p_s1b <- ggplot(weight) +
stat_summary(
fun.y = "mean", aes(x = days, y = weight, color = state),
geom = "point", size = 3) +
stat_summary(fun.y = "mean", aes(x = days, y = weight, color = state),
geom = "line") +
labs(x = "Days", y = "Body Weight (grams)") +
guides(color = guide_legend(title = NULL)) +
# scale_x_date(date_breaks = "2 day", date_labels = "%m/%d") +
theme_bw() +
scale_color_manual(values = c("#4DBBD5FF", "#E64B35FF")) +
scale_x_continuous(
breaks = seq.int(0, 35, by = 7),
labels = seq.int(0, 35, by = 7)
) +
ylim(200, 400) +
scale_y_continuous(
breaks = seq.int(200, 400, by = 50),
labels = seq.int(200, 400, by = 50)
) +
theme(legend.position = "none")
p_s1 <- p_s1a + p_s1b + plot_layout(nrow = 1, width = c(1, 1)) + plot_annotation(tag_levels = "A")
ggsave("output/figure/figure_s1.pdf", p_s1, width = 8,
height = 4)
p_s1
Supplementary Figure S1. Changes in rectal temperature during 28 days of heat exposure.
To assess the changes of the gut microbiota in HA, the gut microbiota was sequenced on 1 day before HA (day 0), and days 14 and 28 in both HA and CR subjects.
Quality filtering on the raw reads were performed under specific filtering conditions to obtain the high-quality clean reads according to the Cutadapt quality controlled process. Chimera sequences were identified and removed using UCHIME algorithm.
reads_sample_name <- c(
paste0("HA.", rep(c("28.", "14.", "0."), each = 8), c(1:4, 6:9)),
paste0("CR.", rep(c("28.", "14.", "0."), each = 8), c(1, 3:7, 9, 10))
)
reads_summary <- read_tsv("data/raw/reads_summary.txt") %>%
filter(`#Sample_name` %in% reads_sample_name) %>% # select 48 samples
mutate(`#Sample_name` = ifelse(str_detect(`#Sample_name`, "^HA"), # sample name transform
str_replace(`#Sample_name`, "^HA", "CR"),
str_replace(`#Sample_name`, "^CR", "HA")
))
# write_tsv(reads_summary, "data/raw/reads_summary_new.txt")
# total reads
total_reads <- sum(reads_summary$`Raw_reads(#)`)
total_reads
## [1] 4062872
# high quality reads
effective_reads <- sum(reads_summary$`Clean_Reads(#)`)
effective_reads
## [1] 3824443
# mean reads per sample
effective_reads / 48
## [1] 79675.9
# range of reads across samples
range(reads_summary$`Clean_Reads(#)`)
## [1] 60593 82017
A total of 4062872 reads were obtained which reduced to 3824443 reads after quality filtering (chimera) corresponding to a mean of 79675 reads per sample (ranging from 60593 to 82017).
A total of 1374 OTUs were identified which has a minimum count of 0.0005 of total reads across samples.
qiime_otu_table <- import_biom(
BIOMfilename = "data/raw/qiime/otu_table_mc2_w_tax_no_pynast_failures.biom",
treefilename = "data/raw/qiime/rep_set.tre"
)
# set the taxonomic ranks manually
colnames(tax_table(qiime_otu_table)) <- c(
"Kingdom", "Phylum", "Class", "Order",
"Family", "Genus", "Species"
)
mapping <- import_qiime_sample_data(mapfilename = "data/raw/mapping_new.txt")
ps_initial <- merge_phyloseq(qiime_otu_table, mapping)
# filter low confidence otu
# filter low confidence otu
otu_reads <- taxa_sums(ps_initial)
big_otu <- otu_reads[otu_reads/sum(otu_reads) > 0.00005]
ps_big <- prune_taxa(names(big_otu), ps_initial)
# 1374 OTUs
ntaxa(ps_big)
## [1] 1374
As we expected, no significant differences were found between CR and HA on day 0 by comparing the number of OTUs (1038 and 1071 OTUs; P = 0.6, Wilcoxon sum test; Supplementary Figure S2A, C). The number of OTUs was significant different between CR and HA on day 28 (1207 and 1279 OTUs, respectively; P = 0.014, Wilcoxon sum test) rather and day 14 (1313 and 1295 OTUs; P = 0.19, Wilcoxon sum test). (Supplementary Figure S2A, D, E).
The averaged coverage and subsampling were sufficient to describe gut bacterial communities according to sequencing-based rarefaction curves (Supplementary Figure S2B).
p_s2b <- plot_rarecurve(ps_big) + labs(y = "Number of OTUs")
# comparison of the number of otus
ps_meta <- meta(ps_big)
otus_number <- otu_table(ps_big)@.Data %>%
as_tibble() %>%
map_int(., ~ sum(.x > 0))
otu_number_df <- tibble(
sample = ps_meta$X.SampleID,
treatment = ps_meta$Treatment,
time = ps_meta$Time,
otus_number = otus_number)
p_s2a <- ggplot(otu_number_df, aes(treatment, otus_number, fill = treatment)) +
geom_boxplot() +
facet_wrap(~time, labeller = labeller(time = function(x) paste0("Day:", x))) +
theme_bw() +
ylab("Number of OTUs") +
xlab(NULL) +
theme(legend.position = "none") +
expand_limits(y = c(250, 1150)) +
stat_compare_means(comparisons = list(c("CR", "HA")), label = "p.signif", tip.length = 0, label.y = 1100) +
scale_fill_manual(values = c("#4DBBD5FF", "#E64B35FF"))
npg_colors_2 <- ggsci::pal_npg("nrc")(2) %>% rev()
ps_0 <- subset_samples(ps_big, Time == 0) %>%
prune_taxa(taxa_sums(.) > 0, .)
p_s2c <- generate_venn_diagram(ps_0) %>%
plot(
fills = list(fill = npg_colors_2, alpha = 0.6),
legend = list(side = "top", nrow = 1, ncol = 2),
labels = FALSE,
quantities = list(fontsize = 10)
)
ps_14 <- subset_samples(ps_big, Time == 14) %>%
prune_taxa(taxa_sums(.) > 0, .)
p_s2d <- generate_venn_diagram(ps_14) %>%
plot(
fills = list(fill = npg_colors_2, alpha = 0.6),
legend = FALSE,
labels = FALSE,
quantities = list(fontsize = 10)
)
ps_28 <- subset_samples(ps_big, Time == 28) %>%
prune_taxa(taxa_sums(.) > 0, .)
p_s2e <- generate_venn_diagram(ps_28) %>%
plot(
fills = list(fill = npg_colors_2, alpha = 0.6),
legend = FALSE,
labels = FALSE,
quantities = list(fontsize = 10)
)
p_s2 <- {p_s2a + p_s2b + plot_layout(nrow = 1, width = c(1.5, 1))} -
{as_ggplot(p_s2c) + as_ggplot(p_s2d) + as_ggplot(p_s2e) +
plot_layout(nrow = 1)} +
plot_layout(ncol = 1) + plot_annotation(tag_levels = "A")
ggsave("output/figure/figure_s2.pdf", p_s2, width = 10, height = 7)
p_s2
Supplementary Figure S2. Quality control of 16S rRNA V3-V4 reads. Number of OTUs (A) after quality filtering on day 0, 14 and 28. Wilcoxon test was used to compare CR and HA. (B) Rarefaction curves for all samples with X axis representing number of sequences and Y axis representing number of observed taxa. Venn diagram showing the number of OTUs exclusively identified in each group on day 0 (C), 14 (D) and day 28 (E).
No significant differences were found in microbiome diversity between CR and HA (Supplementary Figure S3)
alpha_diversity <- estimate_richness(ps_big)
alpha_diversity$sample <- row.names(alpha_diversity)
alpha_df <- full_join(ps_meta, alpha_diversity,
by = c("X.SampleID" = "sample"))
# day 0 附件
alpha_df_0 <- filter(alpha_df, grepl(0, X.SampleID)) %>%
select(one_of("Observed", "Shannon", "Simpson", "ACE", "Treatment")) %>%
gather("index", "value", -Treatment)
p_alpha_0 <- ggplot(alpha_df_0, aes(Treatment, value, fill = Treatment)) +
geom_boxplot() +
facet_wrap(~index, scales = "free_y", nrow = 1) +
theme_bw() +
theme(legend.position = "none") +
scale_fill_manual(values = npg_colors_2) +
stat_compare_means(comparisons = list(c("CR", "HA")),
label = "p.signif", tip.length = 0) +
# scale_x_discrete(labels = c("HA","control" = "CR")) +
labs(x = NULL, y = "Alpha Diversity Measure")
Anosim and multi-response permutation procedure (MRPP) anylysis between different groups on day 0
## anosim, MRPP, bray wunifrac
pvalue_0 <- compare_groups(ps_0)
pvalue_0
## $Anosim
## bray jaccard
## 0.037 0.034
##
## $MRPP
## bray jaccard
## 0.061 0.064
Beta diversity
min_reads_0 <- min(sample_sums(ps_0))
ps_ev_0 = transform_sample_counts(ps_0,
function(x) min_reads_0 * x/sum(x))
pcoa_bray_0 <- ordinate(ps_ev_0, method = "PCoA", distance = "bray")
p_beta_bray_0 <- plot_ordination(ps_ev_0, pcoa_bray_0,
axes = c(1, 2),
color = "Treatment") +
geom_point(size = 2) +
theme_bw() +
scale_color_manual(labels = c(control = "CR", heat = "HA"), values = npg_colors_2) +
theme(legend.title = element_blank(), legend.position = "none")
# annotate("text",
# label = "bold(Anosim)~~Bray~italic(P)==0.46~~Jaccard~italic(P)==0.49",
# x=0.25, y=-0.77, parse = TRUE, size = 3)
pcoa_jaccard_0 <- ordinate(ps_ev_0, method = "PCoA", distance = "jaccard")
p_beta_jaccard_0 <- plot_ordination(ps_ev_0, pcoa_jaccard_0,
axes = c(1, 2),
color = "Treatment") +
geom_point(size = 2) +
# stat_ellipse(level = 0.95) +
# geom_text(aes(label = X.SampleID), nudge_x = 0.1, nudge_y = 0.001) +
theme_bw() +
scale_color_manual(labels = c(control = "CR", heat = "HA"), values = npg_colors_2) +
theme(legend.title = element_blank())
# annotate("text",
# label = "bold(MRPP)~~Bray~italic(P)==0.377~~Jaccard~italic(P)==0.368",
# x=0, y=-0.78, parse = TRUE, size = 3)
p_s3 <- p_alpha_0 - {p_beta_bray_0 + p_beta_jaccard_0 + plot_layout(nrow = 1)} +
plot_layout(nrow = 2, heights = c(1.4, 1)) + plot_annotation(tag_levels = "A")
ggsave("output/figure/figure_s3.pdf", p_s3, width = 7, height = 6)
p_s3
Supplementary Figure S3. Diversity analysis on day 0. (A) Alpha diversity assessed by richness (ACE, Observed), diversity (Shannon, Simpson). Boxes represent the interquartile ranges, and the inside black plots represent the median and circles are outliers. P values are from Wilcoxon rank sum test. Beta diversity assessed by Principal coordinate analysis (PCoA) based on the Bray-Curtis (B) and Jaccard (C) distances. P values are from Wilcoxon rank sum test. P values: ns, no significance P > 0.05.
No significant differences were observed in alpha diversity indices on day 14 (Supplementary Figure S4 A, all P > 0.05, Wilcoxon sum test).
alpha_df_14 <- filter(alpha_df, grepl(14, X.SampleID)) %>%
select(one_of("Observed", "Shannon", "Simpson", "ACE", "Treatment")) %>%
gather("index", "value", -Treatment)
p_alpha_14 <- ggplot(alpha_df_14, aes(Treatment, value, fill = Treatment)) +
geom_boxplot() +
facet_wrap(~index, scales = "free_y", nrow = 1) +
theme_bw() +
theme(legend.position = "none") +
scale_fill_manual(values = npg_colors_2) +
stat_compare_means(comparisons = list(c("CR", "HA")),
label = "p.signif", tip.length = 0) +
# scale_x_discrete(labels = c("HA","control" = "CR")) +
labs(x = NULL, y = "Alpha Diversity Measure")
Anosim and multi-response permutation procedure (MRPP) anylysis between different groups on day 14
## anosim, MRPP, bray wunifrac
pvalue_14 <- compare_groups(ps_14)
pvalue_14
## $Anosim
## bray jaccard
## 0.530 0.554
##
## $MRPP
## bray jaccard
## 0.532 0.489
Beta diverisy
min_reads_14 <- min(sample_sums(ps_14))
ps_ev_14 = transform_sample_counts(ps_14,
function(x) min_reads_14 * x/sum(x))
pcoa_bray_14 <- ordinate(ps_ev_14, method = "PCoA", distance = "bray")
p_beta_bray_14 <- plot_ordination(ps_ev_14, pcoa_bray_14,
axes = c(1, 2),
color = "Treatment") +
geom_point(size = 2) +
theme_bw() +
scale_color_manual(labels = c(control = "CR", heat = "HA"), values = npg_colors_2) +
theme(legend.title = element_blank(), legend.position = "none")
# annotate("text",
# label = "bold(Anosim)~~Bray~italic(P)==0.46~~Jaccard~italic(P)==0.49",
# x=0.25, y=-0.77, parse = TRUE, size = 3)
pcoa_jaccard_14 <- ordinate(ps_ev_14, method = "PCoA", distance = "jaccard")
p_beta_jaccard_14 <- plot_ordination(ps_ev_14, pcoa_jaccard_14,
axes = c(1, 2),
color = "Treatment") +
geom_point(size = 2) +
# stat_ellipse(level = 0.95) +
# geom_text(aes(label = X.SampleID), nudge_x = 0.1, nudge_y = 0.001) +
theme_bw() +
scale_color_manual(labels = c(control = "CR", heat = "HA"), values = npg_colors_2) +
theme(legend.title = element_blank())
# annotate("text",
# label = "bold(MRPP)~~Bray~italic(P)==0.377~~Jaccard~italic(P)==0.368",
# x=0, y=-0.78, parse = TRUE, size = 3)
Alpha diversity showed a significant increase in the HA group compared to the control on day 28 (Figure 1A, all P < 0.05, Wilcoxon sum test).
alpha_df_28 <- filter(alpha_df, grepl(28, X.SampleID)) %>%
select(one_of("Observed", "Shannon", "Simpson", "ACE", "Treatment")) %>%
gather("index", "value", -Treatment)
p_alpha_28 <- ggplot(alpha_df_28, aes(Treatment, value, fill = Treatment)) +
geom_boxplot() +
facet_wrap(~index, scales = "free_y", nrow = 1) +
theme_bw() +
theme(legend.position = "none") +
scale_fill_manual(values = npg_colors_2) +
stat_compare_means(comparisons = list(c("CR", "HA")),
label = "p.signif", tip.length = 0) +
scale_x_discrete(labels = c("heat" = "HA","control" = "CR")) +
labs(x = NULL, y = "Alpha Diversity Measure")
HA group were clearly separated from control group on day 28 (all P < 0.05 by Anosim and multi-response permutation procedure (MRPP) analysis, for Bray and Jaccard distances, respectively) (Figure 1B, C).
## anosim, MRPP, bray wunifrac
pvalue_28 <- compare_groups(ps_28)
pvalue_28
## $Anosim
## bray jaccard
## 0.002 0.004
##
## $MRPP
## bray jaccard
## 0.003 0.004
min_reads <- min(sample_sums(ps_28))
ps_ev_28 = transform_sample_counts(ps_28, function(x) min_reads * x/sum(x))
pcoa_bray_28 <- ordinate(ps_ev_28, method = "PCoA", distance = "bray")
p_beta_bray_28 <- plot_ordination(ps_ev_28, pcoa_bray_28,
axes = c(1, 2),
color = "Treatment") +
geom_point(size = 2) +
stat_ellipse(level = 0.95) +
# geom_text(aes(label = X.SampleID), nudge_x = 0.1, nudge_y = 0.001) +
theme_bw() +
scale_color_manual(labels = c(control = "CR", heat = "HA"), values = npg_colors_2) +
theme(legend.title = element_blank(), legend.position = "none") +
annotate("text",
label = "bold(Anosim)~~Bray~italic(P)==0.003~~Jaccard~italic(P)==0.003",
x=-0.18, y=-0.77, parse = TRUE, size = 3)
# annotation_custom(ggplotGrob(p_pvalue_28), xmin = -0.75,
# xmax = -0.4, ymin = 0.4)
pcoa_jaccard_28 <- ordinate(ps_ev_28, method = "PCoA", distance = "jaccard")
p_beta_jaccard_28 <- plot_ordination(ps_ev_28, pcoa_jaccard_28,
axes = c(1, 2),
color = "Treatment") +
geom_point(size = 2) +
stat_ellipse(level = 0.95) +
# geom_text(aes(label = X.SampleID), nudge_x = 0.1, nudge_y = 0.001) +
theme_bw() +
scale_color_manual(labels = c(control = "CR", heat = "HA"), values = npg_colors_2) +
theme(legend.title = element_blank()) +
annotate("text",
label = "bold(MRPP)~~Bray~italic(P)==0.001~~Jaccard~italic(P)==0.005",
x=-0.2, y=-0.78, parse = TRUE, size = 3)
p_f1 <- p_alpha_28 + {p_beta_bray_28 + p_beta_jaccard_28 + plot_layout(ncol = 2)} +
plot_layout(nrow = 2, heights = c(1.3, 1)) + plot_annotation(tag_levels = "A")
ggsave("output/figure/figure_1.pdf", p_f1, width = 8, height = 7)
p_f1
Figure 1. Diversity analysis on day 28. (A) Alpha diversity assessed by richness (ACE, Observed), diversity (Shannon, Simpson). Boxes represent the interquartile ranges, and the inside black plots represent the median and circles are outliers. P values are from Wilcoxon rank sum test. Beta diversity assessed by Principal coordinate analysis (PCoA) based on the Bray-Curtis (B) and Jaccard (C) distances. Significant P-values of Anosim and multi-response permutation procedure (MRPP) between groups emphasize the differences in microbial community structure. P values: * P < 0.05, ** P < 0.01.
Phylum compostion of all samples
top_phylum <- aggregate_top_taxa(ps_big, 8, "Phylum") %>%
microbiome::transform("compositional")
p_composition <- plot_composition2(top_phylum) + scale_fill_npg() +
labs(x = NULL, y = "Relative abundance")
ggsave("output/figure/figure_2a.pdf", p_composition, width = 6, height = 6)
p_composition
Figure 2A. Relative abundance of bacterial at the phylum level.
Since population differences in the gut microbiota only observed between HA and CR on day 28, only subjects on day 28 were considered in the following analysis. To identify the taxa that were differentially represented in HA and CR subjects, we compared the relative abundances between these two groups at different taxonomic levels.
The dominant of the bacterial phyla identified in the fecal samples were encompassed by Firmicutes, Bacteroidetes, Proteobacteria, Cyanobacteria and Actinobacteria (Figure S4D).
There were subtle differences in bacterial community composition between HA and CR subjects (Supplementary Figure S4D)
p_mean_comosition <- subset_samples(top_phylum, Time == 28) %>%
plot_composition2(group = "group") +
# scale_fill_manual(values = color[[1]]) +
scale_x_discrete(labels = c("CR28" = "CR", "HA28" = "HA")) +
scale_y_percent() +
theme_bw() +
scale_fill_npg() +
labs(x = NULL, y = "Relative abundance") +
coord_flip() +
theme(legend.position = "bottom")
p_s4 <- p_alpha_14 +
{p_beta_bray_14 + p_beta_jaccard_14 + plot_layout(ncol = 2)} +
p_mean_comosition +
plot_layout(nrow = 3, heights = c(1.4, 1, 0.8)) +
plot_annotation(tag_levels = "A")
ggsave("output/figure/figure_s4.pdf", p_s4, width = 8, height = 10)
p_s4
Supplementary Figure S4. Diversity analysis on day 14. (A) Alpha diversity assessed by richness (ACE, Observed), diversity (Shannon, Simpson). Boxes represent the interquartile ranges, and the inside black plots represent the median and circles are outliers. P values are from Wilcoxon rank sum test. Beta diversity assessed by Principal coordinate analysis (PCoA) based on the Bray-Curtis (B) and Jaccard (C) distances. Significant P-values of Anosim and multi-response permutation procedure (MRPP) between groups emphasize the differences in microbial community structure. (D) Relative abundance of bacterial phyla. P values: ng, no significance P > 0.05.
However, no significant difference was found at phylum level (all P > 0.05, Wilcoxon sum test, Supplementary Table S1).
calculate_p <- function(abdlist) {
t <- wilcox.test(abdlist[[1]], abdlist[[2]], paired = FALSE)
return(t$p.value)
}
phylum_28 <- aggregate_taxa(ps_28, "Phylum") %>%
microbiome::transform("compositional") %>%
psmelt()
phylum_p <- group_by(phylum_28, Phylum, Treatment) %>%
do(abd = .$Abundance) %>%
group_by(Phylum) %>%
do(abdlist = .$abd) %>%
mutate(p_value = calculate_p(abdlist))
phylum_p$q <- p.adjust(phylum_p$p_value, "BH")
mean_phylum <- group_by(phylum_28, Phylum, Treatment) %>%
summarise(mean = mean(Abundance)) %>%
spread(Treatment, mean)
diff_phylum <- inner_join(phylum_p, mean_phylum) %>%
arrange(q) %>%
mutate(`p-value` = round(p_value, 4), FDR = round(q, 4),
CR = round(CR, 4), HA = round(HA, 4)) %>%
select(Phylum, `p-value`, FDR, CR, HA)
write_csv(diff_phylum, "output/table/table_s1.csv")
diff_phylum
Genus level analysis showed that there were four significantly differential genus in HA subjects compared to CR subjects, including Blautia, Oscillospira, Lactobacillus, Allobaculum, lactobacillus, a common probiotic, was significantly increased in HA subjects (Supplementary Table S2).
genus <- aggregate_taxa(ps_28, "Genus") %>%
format_unique_genus() %>%
microbiome::transform("compositional") %>%
psmelt()
genus_p <- group_by(genus, Genus, Treatment) %>%
do(abd = .$Abundance) %>%
group_by(Genus) %>%
do(abdlist = .$abd) %>%
mutate(p_value = calculate_p(abdlist))
genus_p$fdr <- p.adjust(genus_p$p_value, "BH")
mean_genus <- group_by(genus, Genus, Treatment) %>%
summarise(mean = mean(Abundance)) %>%
spread(Treatment, mean)
diff_genus <- inner_join(genus_p, mean_genus) %>%
arrange(fdr) %>%
mutate(`p-value` = round(p_value, 4), FDR = round(fdr, 4),
CR = round(CR, 4), HA = round(HA, 4)) %>%
select(Genus, `p-value`, FDR, CR, HA)
write_csv(diff_genus, "output/table/table_s2.csv")
diff_genus
差异属的比值çƒå›¾
sig_diff_genus <- filter(genus_p,
Genus %in% paste0("g__", c("Lactobacillus", "Oscillospira", "Blautia",
"Allobaculum", "Prevotella"))
)
# sig_diff_genus <- filter(genus_p, p_value < 0.05)
sig_genus <- bind_cols(map(sig_diff_genus$abdlist, unlist))
names(sig_genus) <- as.character(sig_diff_genus$Genus)
row.names(sig_genus) <- c(paste("C_", 1:8), paste("H_", 1:8))
sig_genus2 <- t(sig_genus)
sig_mean_c <- sig_genus2[, 1:8] %>% rowMeans()
sig_mean_h <- sig_genus2[, 9:16] %>% rowMeans()
sig_genus_fc1 <- sweep(sig_genus2[, 1:8], 1, sig_mean_h, FUN = "/")
sig_genus_fc2 <- sweep(sig_genus2[, 9:16], 1, sig_mean_c, FUN = "/")
sig_genus_fc <- cbind(sig_genus_fc1, sig_genus_fc2)
Heatmap(sig_genus_fc %>% log2,
col = c("steelblue", "#EEEEEE", "red"),
#col = circlize::colorRamp2(c(-5, 0, 5), c("blue", "white", "red")),
heatmap_legend_param = list(
title = "Z score",
title_position = "lefttop-rot"
))
The genera abundance distribution was quite discrepant to that of CRs (Figure 2C).
# relative abundance
otus_28 <- tax_glom(ps_ev_28, "Genus") %>%
otu_table()
otus_28 <- otus_28@.Data
d <- vegdist(otus_28)
cluster <- hclust(d)
tree <- treeio::as.phylo(cluster)
# genus info
tree_info <- tax_glom(ps_ev_28, "Genus") %>% tax_table()
tree_info <- tree_info@.Data
tree_info_df <- data.frame(id = row.names(tree_info), tree_info)
p_composition_level <- levels(p_composition$data$Phylum)
tree_info_df$Phylum <- factor(tree_info_df$Phylum,
levels = p_composition_level[-1])
# p <- ggtree(tree, layout = "circular", branch.length = "none")
p_tree_phylum <- ggtree(tree, layout = "circular", branch.length = "none") %<+%
tree_info_df +
geom_tippoint(aes(color=Phylum)) +
scale_color_npg() +
theme(legend.position = "none")
# z score of genus
heatmap_data <- microbiome::transform(tax_glom(ps_ev_28, "Genus"), transform = "Z")
ht <- otu_table(heatmap_data)@.Data
exp_group <- ifelse(grepl("^C", colnames(ht)), TRUE, FALSE)
cr_zmean <- apply(ht[, exp_group], 1, mean)
ha_zmean <- apply(ht[, !exp_group], 1, mean)
nm <- row.names(ht)
ht_df <- data.frame(CR = cr_zmean, HA = ha_zmean)
p_heatmap_tree <- gheatmap(p_tree_phylum, ht_df, color = NULL, font.size = 2.5,
colnames_position = "top",
hjust = 0.4, width = 0.3) +
scale_fill_gradient2(low="steelblue", mid = "#EEEEEE", high = "red",
breaks = c(-1, -0.5, 0, 0.5, 1), labels = c(-1, -0.5, 0, 0.5, 1)) +
guides(color = FALSE) +
# theme(legend.title = element_text(vjust = 0)) +
guides(fill = guide_colorbar(
title = "Z score",
title.position = "left",
# direction = "horizontal",
barheight = 0.8
# barwidth = 0.8
)) +
theme(legend.direction = "horizontal",
legend.position = c(0.5, 0.02),
legend.title = element_text(vjust = 1))
ggsave("output/figure/figure_2b.pdf", p_heatmap_tree, width = 6, height = 6, units = "in")
p_heatmap_tree
Figure 2C. Heatmap tree shows genera significantly different in HA compare to those in CR group, and their phylogenic relationships on day 28. The Abundance profiles are expressed by z-scores.
To explore more precisely the taxa that were driving the differentiation of the microbiota of the groups, we performed differential analysis based on DESeq2 (Love et al., 2014), a method using negative binomial GLM to obtain maximum likelihood estimates between two conditions. Then the RA of taxa, which showed false discovery rate (FDR)-corrected P value < 0.05 with differential expression analysis conducted on whole microbiota profile, were expressed as heat map (Figure 2B, Supplementary Table S3).
diagdds = phyloseq_to_deseq2(ps_28, ~ Treatment)
gm_mean = function(x, na.rm=TRUE){
exp(sum(log(x[x > 0]), na.rm = na.rm) / length(x))
}
geoMeans = apply(counts(diagdds), 1, gm_mean)
diagdds = estimateSizeFactors(diagdds, geoMeans = geoMeans)
diagdds = DESeq(diagdds, fitType="local")
res = results(diagdds)
res = res[order(abs(res$padj), na.last=NA), ]
# res_fc = res[order(res$log2FoldChange, na.last=NA, decreasing = TRUE), ]
alpha = 0.05
sigtab = res[(res$padj < alpha), ]
# sigtab_fc = res_fc[(res_fc$padj < alpha), ]
sigtab = cbind(as(sigtab, "data.frame"),
as(tax_table(ps_28)[rownames(sigtab), ], "matrix"))
diff_otu <- row.names(sigtab)
otus <- otu_table(microbiome::transform(ps_28, "log10p"))
diff_otus <- otus[row.names(otus) %in% diff_otu[1:40], ]
diff_otus <- diff_otus@.Data
taxas <- tax_table(ps_28)@.Data
diff_taxas <- taxas[row.names(taxas) %in% row.names(diff_otus), ]
row.names(diff_otus) <- paste(diff_taxas[, "Phylum"],
diff_taxas[, "Family"], ";")
# pheatmap::pheatmap(diff_otus, annotation_col = sample_col, cutree_cols = 2)
p_heatmap <- Heatmap(diff_otus@.Data, column_split = 2,
column_title = NULL,
col = colorRamp2(c(0,2,4), c("steelblue", "#EEEEEE", "red")),
column_names_gp = gpar(fontsize = 9),
row_names_gp = gpar(fontsize = 9),
bottom_annotation = HeatmapAnnotation(
fill = anno_block(gp = gpar(fill =c("grey24")),
labels = c("CR", "HA"),
labels_gp = gpar(col = "white", fontface = "bold"))),
heatmap_legend_param = list(
x = unit(1, "cm"), y = unit(1, "cm"),
title = "Log Abundance",
title_position = "leftcenter-rot",
legend_height = unit(6, "cm")),
)
p_heatmap <- as_ggplot(grid.grabExpr(draw(p_heatmap)))
ggsave("output/figure/figure_2c.pdf", p_heatmap, width = 8, height = 12)
p_heatmap
Figure 2B. Hierarchical clustering with a heat map shows the RA of representative OTUs (those with greatest difference between HA and CR on day 28) group means from each OTU selected for P < 0.05, obtained with differential abundance analysis with DESeq2. The OTUs are shown as phylum and family.
st3 <- mutate(sigtab,
baseMean = round(baseMean, 4),
log2FoldChange = round(log2FoldChange, 4),
lfcSE = round(lfcSE, 4),
stat = round(stat, 4),
pvalue = formatC(pvalue, format = "e", digits = 4),
padj = formatC(padj, format = "e", digits = 4)
) %>%
select(Genus, baseMean:padj)
st3$OTU <- row.names(sigtab)
st3 <- select(st3, OTU, Genus, baseMean, log2FoldChange, pvalue, padj)
write_tsv(st3, "output/table/figure_s3.tsv")
st3
The following analysis inlcuding lefse OTU biomarker (using lefse) and functional prediction (usring picrust, lefse, and bugbase), is based on the taxa tables from qiime. Thus, we should convert the qiime taxa tables to the file that is compatible with lefse, picrust and bugbase.
You should downloaded greegene database from ftp://greengenes.microbio.me/greengenes_release/gg_13_5/gg_13_5_otus.tar.gz before run the folling script, unzip it and put file 97_otus.fasta under directory data/raw.
sh src/shell/qiime_to_lefse_picrust_bugbase.sh
Differentially abundant taxa (Supplementary Table S4) were further confirmed by LEfSe. Fist, generate the input file for lefse
qiime_lefse <- read_tsv("data/processed/summaryize_taxa_L6/mapping_new_L6.txt") %>%
select(Treatment,
`Unassigned|Other`:`k__Bacteria|p__Verrucomicrobia|c__Verrucomicrobiae|o__Verrucomicrobiales|f__Verrucomicrobiaceae|g__Akkermansia`) %>%
t() %>%
as.data.frame() %>%
rownames_to_column('otu') %>%
filter(!grepl("(Other)|(__)$", otu)) # åˆ é™¤æœªç¡®å®šçš„taxa
write_tsv(qiime_lefse, "data/processed/lefse_qiime/lefse_otu_input.txt", col_names = FALSE)
lefse analysis
format_input.py data/processed/lefse_qiime/lefse_otu_input.txt \
output/lefse_qiime/lefse_qiime.in -c 1 -o 1000000
run_lefse.py output/lefse_qiime/lefse_qiime.in output/lefse_qiime/lefse_qiime_out.res
plot_cladogram.py output/lefse_qiime/lefse_qiime_out.res output/lefse_qiime/figure_3.pdf \
--dpi 300 --format pdf --title="" --right_space_prop 0.3 --label_font_size 8
include_graphics("output/lefse_qiime/figure_3.pdf")
lefse_otu_marker <-
read_tsv(
"output/lefse_qiime/lefse_qiime_out.res",
col_names = c("otu", "mean_abd", "enrich_group", "lda", "p")
) %>%
filter(!is.na(enrich_group)) %>%
mutate(lda = round(lda, 4),
p = round(as.numeric(p), 4)) %>%
select(otu, enrich_group, lda, p) %>%
arrange(desc(enrich_group), desc(lda))
write_tsv(lefse_otu_marker, "output/lefse_qiime/table_s4.tsv")
lefse_enrich <-
read_tsv(
"output/lefse_qiime/lefse_qiime_out.res",
col_names = c("otu", "mean_abd", "enrich_group", "lda", "p")
) %>%
filter(!is.na(enrich_group))
write_tsv(lefse_enrich, "output/lefse_qiime/lefse_enriched.tsv",
col_names = FALSE)
lefse_otu_marker
To infer the functional alternations of gut microbiota in HA, we employed PICRUSt (Langille et al., 2013) algorithm to predict the functional composition profiles from 16S rRNA gene-based microbial compositions.
# Normalize OTU Table
normalize_by_copy_number.py -i data/processed/picrust.biom \
-o output/picrust_kegg/normaolized_otus.biom
# Predict Functions For Metagenome
predict_metagenomes.py -i output/picrust_kegg/normaolized_otus.biom \
-o output/picrust_kegg/metagenome_predictions.biom
# convert to tab for pca
biom convert -i output/picrust_kegg/metagenome_predictions.biom \
-o output/picrust_kegg/metagenome_predictions.txt --to-tsv
Overall, the microbial communities present in HA and CR could be distinguished based on their functions (Figure S4A).
picrust_ko <- read_tsv(
"output/picrust_kegg/metagenome_predictions.txt",
skip = 1
)
pca_in <- select(picrust_ko, -`#OTU ID`) %>%
t()
subjects_name <- row.names(pca_in)
pca_color <- tibble(name = subjects_name,
color = ifelse(grepl("^HA", subjects_name), "HA", "CR"))
p_f4a <- autoplot(prcomp(pca_in),
data = pca_color, colour = "color") +
theme_bw() +
scale_color_manual(values = npg_colors_2) +
theme(legend.position = "none")
Based on this, we then performed kegg pathway enrichment analysis using lefse.The thousands of predicted functions should be collapsed into higher categories (kegg pathway).
categorize_by_function.py -f -i output/picrust_kegg/metagenome_predictions.biom \
-c KEGG_Pathways -l 2 -o output/picrust_kegg/kegg_pathway_l2.txt
Format the kegg pathway to the file that is compatible with lefse.
picrust_kegg <- read_tsv("output/picrust_kegg/kegg_pathway_l2.txt", skip = 1)
picrust_kegg$pathway <- str_replace_all(picrust_kegg$`#OTU ID`, " ", "_")
picrust_kegg$`#OTU ID` <- NULL
picrust_kegg$KEGG_Pathways <- NULL
picrust_kegg <- select(picrust_kegg, pathway, everything())
picrust_kegg <- select(picrust_kegg, pathway, everything())
lefse_class <- c("class",
ifelse(grepl("^C", names(picrust_kegg)[-1]), "CR", "HA")) %>%
matrix( ncol = ncol(picrust_kegg)) %>%
as.data.frame(stringsAsFactors = FALSE) %>%
set_names(names(picrust_kegg))
picrust_kegg <- rbind(lefse_class, picrust_kegg)
write_tsv(picrust_kegg, "output/picrust_kegg/lefse_kegg_in.txt", col_names = FALSE)
The predicted KEGG pathway significantly enriched in HA included cell motility, signal transduction, lipid metabolism, metabolism of other amino acids, neurodegenerative diseases, environmental adaption and transport and catabolism (Figure 4C, Supplementary Table S5).
format_input.py output/picrust_kegg/lefse_kegg_in.txt \
output/picrust_kegg/lefse_kegg.in -c 1 -o 1000000
run_lefse.py output/picrust_kegg/lefse_kegg.in \
output/picrust_kegg/lefse_kegg_out.res
lefse_pathway <- read_tsv(
"output/picrust_kegg/lefse_kegg_out.res",
col_names = c("pathway", "mean_abd", "enrich_group", "lda", "p")
)
enrich_pathway <- filter(lefse_pathway, !is.na(enrich_group))
# pathway ordered according to lda in different groups
pathway_order <- arrange(enrich_pathway,
desc(enrich_group), desc(lda))$pathway
p_f4c <- ggplot(enrich_pathway, aes(pathway, lda)) +
geom_segment(aes(xend = pathway, yend = 0, colour = enrich_group), size = 8) +
theme_bw() +
scale_color_manual(values = npg_colors_2) +
labs(y = "LDA SCORE (log10)", x = "KEGG pathway") +
guides(colour = guide_legend(title = NULL)) +
theme(legend.position = c(0.8, 0.87), legend.direction = "horizontal",
axis.text.x = element_text(angle = -90, hjust = 0)) +
scale_x_discrete(limits = pathway_order) +
scale_y_continuous(expand = c(0, 0))
pathway enrichment analysis using lefse
lefse_pathway_marker <-
read_tsv(
"output/picrust_kegg/lefse_kegg_out.res",
col_names = c("otu", "mean_abd", "enrich_group", "lda", "p")
) %>%
filter(!is.na(enrich_group)) %>%
mutate(lda = round(lda, 4),
p = round(as.numeric(p), 4)) %>%
select(otu, enrich_group, lda, p) %>%
arrange(desc(enrich_group), desc(lda))
write_tsv(lefse_pathway_marker, "output/picrust_kegg//table_s5.tsv")
lefse_pathway_marker
We predicted nine potential phenotypes, including aerobic, anaerobic, containing mobile elements, facultatively anaerobic, biofilm forming, Gram negative, Gram positive, potentially pathogenic, and stress tolerant.
Generate the bugbase mapping file
bugbase_mapping <- meta(ps_28) %>% mutate(`#SampleID` = X.SampleID)
bugbase_mapping <- select(bugbase_mapping, `#SampleID`, Treatment)
write_tsv(bugbase_mapping, "output/bugbase/bugbase_mapping.txt")
Run bugbase
run.bugbase.r -i data/processed/bugbase_json.biom \
-m output/bugbase/bugbase_mapping.txt -c Treatment \
-o output/bugbase/bugbase_output_class_T0.001 -T 0.001 -t 3
Among all the phenotypes, the relative abundance of the Aerobic was significantly increased and the stress tolerant was significantly decreased after HA (Figure 4B).
bugbase_predictions <- read_tsv(
"output/bugbase/bugbase_output_class_T0.001/predicted_phenotypes/predictions.txt"
)
mappings <- read_tsv("output/bugbase/bugbase_mapping.txt")
phynotypes <- c(
"Aerobic",
"Anaerobic",
"Contains_Mobile_Elements",
"Facultatively_Anaerobic",
"Forms_Biofilms",
"Gram_Negative",
"Gram_Positive",
"Potentially_Pathogenic",
"Stress_Tolerant"
)
extract_bugbase_p <- function(file) {
bugbase_stat <- read_tsv(file, col_names = FALSE)
return(as.numeric(bugbase_stat[nrow(bugbase_stat), 1]))
}
files <- paste0("output/bugbase/bugbase_output_class_T0.001/predicted_phenotypes/",
phynotypes, "_stats.txt")
phynotypes_p <- map_dbl(files, extract_bugbase_p)
names(phynotypes_p) <- phynotypes
signf_phenotype <- phynotypes_p[phynotypes_p <= 0.05]
bugbase_signf <- bugbase_predictions[c("X1", names(signf_phenotype))]
bugbase_signf <- full_join(mappings, bugbase_signf,
by = c("#SampleID" = "X1")) %>%
gather("phenotype", "value", Aerobic:Stress_Tolerant)
# different limits in different facet
my_limits_df <- data.frame(Treatment = NA,
phenotype = rep(names(signf_phenotype), each =2),
value = c(0, 0.15, 0.98, 1.001))
my_pvalue_df <- data.frame(
Treatment = NA,
phenotype = c(names(signf_phenotype)),
start = rep("CR",2), end = rep("HA", 2),
y = c(0.085, 1.0005),
label = rep("*", 2),
tip = c(0.002, 0.01)
)
p_bugbase <- ggplot(bugbase_signf, aes(x = Treatment, y = value, fill = Treatment)) +
geom_boxplot() +
geom_signif(
data = my_pvalue_df,
aes(xmin = start, xmax = end, annotations = label, y_position = y, tip_length = tip),
textsize = 3, vjust = -0.2, manual = TRUE
) +
geom_blank(data = my_limits_df, aes(Treatment, value)) +
facet_wrap(~phenotype, scales = "free_y") +
scale_fill_manual(values = npg_colors_2) +
theme_bw() +
labs(x = NULL, y = "Relative abundance") +
theme(legend.position = "none")
# ggsave("output/bugbase/figure_4.pdf", p_bugbase, width = 5, height = 4)
# p_bugbase
#
# {p_s5a + p_bugbase + plot_layout(ncol = 2)} + p_s5b
p_f4 <- (p_f4a + p_bugbase + plot_layout(nrow = 1, widths = c(0.8, 1))) -
p_f4c + plot_layout(ncol = 1, heights = c(1, 1)) +
plot_annotation(tag_levels = "A")
ggsave("output/figure/figure_4.pdf", p_f4, width = 6.5, height = 8)
p_f4
Figure 4. Functional analysis on day 28. (A) Principal component analysis (PCA) plot comparing the KEGG ontology (KO) between HA and CR subjects. KO was predicted using PICRUSt. (B) Phenotypic prediction using BugBase. Among all the phenotypes, the relative abundance of the aerobic was significantly increased and the stress tolerant was significantly decreased after HA subjects. Statistical significance was determined by Wilcoxon rank sum tests with FDR correction. P value: * P < 0.05. (C) Predicted KEGG Pathways differentially abundant between HA and CR subjects.
# mapping <- read_tsv("data/raw/mapping.txt")
# selected_reads_sample_name <- mapping$InputFileName %>%
# tools::file_path_sans_ext()
# selected_reads_file_name <- paste0(selected_reads_sample_name, ".fna")
#
# reads_path <- "/P101SC18030731-01-B8-3_result/01.CleanData"
# # reads_sample_name <- list.dirs(reads_path, full.names = FALSE, recursive = FALSE)
# selected_reads_file <- file.path(
# reads_path,
# selected_reads_sample_name,
# selected_reads_file_name
# )
# reads_file_to <- file.path(
# "data/raw/reads",
# map_chr(
# selected_reads_file_name,
# ~ ifelse(str_detect(.x, "^HA"), # sample name transform
# str_replace(.x, "^HA", "CR"),
# str_replace(.x, "^CR", "HA")
# )
# )
# )
# # file.copy(selected_reads_file, reads_file_to)
# mapping$InputFileName <- basename(reads_file_to)
# # write_tsv(mapping, "data/raw/mapping_new.txt")
To examine the changes on HA microbial community structure and how HA affects the microbial interactions, we utilized SPIEC-EASI33 to infer two microbial ecological networks of HA and CR subjects. In general, there were more positive correlations than negative correlations in both ecological networks.
# To minimize the interference of low confidence OTUs, OTUs that were less than 100 reads over all samples or present in less 30% of the samples were filter out.
ps_28 <- subset_samples(ps_big, Time == 28)
otu <- otu_table(ps_28)
otu <- otu[rowSums(otu > 0) > 5, ]
otu <- otu[rowSums(otu) > 100, ]
otu_table(ps_28) <- otu
ps_ha28 <- subset_samples(ps_28, Treatment == "HA")
ps_cr28 <- subset_samples(ps_28, Treatment == "CR")
cor_ha28 <- spiec.easi(ps_ha28, method='mb', lambda.min.ratio=1e-2, nlambda=10,
icov.select.params=list(rep.num=100, ncores=2))
net_ha28 <- make_net(ps_ha28, cor_ha28)
cor_cr28 <- spiec.easi(ps_cr28, method='mb', lambda.min.ratio=1e-2, nlambda=10,
icov.select.params=list(rep.num=100, ncores=2))
net_cr28 <- make_net(ps_cr28, cor_cr28)
The degree distributions of the vertices were similar in the two networks (Figure 5C).
dd_ha28 <- degree_distribution(net_ha28, cumulative = FALSE)
dd_cr28 <- degree_distribution(net_cr28, cumulative = FALSE)
n_dd_ha28 <- length(dd_ha28)
n_dd_cr28 <- length(dd_cr28)
dd_tibble <- tibble(
x = c(0:(n_dd_ha28 - 1), 0:(n_dd_cr28 - 1)),
y = c(dd_ha28, dd_cr28),
state = rep(c("HA", "CR"), c(n_dd_ha28, n_dd_cr28))
)
p_dd <- ggplot(dd_tibble, aes(x, y, color = state)) +
geom_point() +
geom_line() +
theme_bw() +
scale_color_manual(values = npg_colors_2) +
labs(x = "Degree", y = "Frequency") +
theme(legend.title = element_blank(), legend.position = c(0.87, 0.87))
We then estimated the natural connectivity, a general metric to measure robustness of network against attacks by sequentially removing hubs (nodes with high degree centrality). The HA network was more robust than the CR network by removing high degree nodes. (Fig. 5D)
nc_ha28_degree <- nc_attack(net_ha28,method = "degree")
##
Progress: ───────────────────────────────────────────────────────────────────────────────── 100%
Progress: ───────────────────────────────────────────────────────────────────────────────── 100%
Progress: ─────────────────────────────────────────────────────────────────────────────────── 100%
Progress: ───────────────────────────────────────────────────────────────────────────────────── 100%
Progress: ────────────────────────────────────────────────────────────────────────────────────── 100%
nc_cr28_degree <- nc_attack(net_cr28, method = "degree")
##
Progress: ─────────────────────────────────────────────────────────────────────────────────── 100%
Progress: ───────────────────────────────────────────────────────────────────────────────────── 100%
Progress: ────────────────────────────────────────────────────────────────────────────────────── 100%
nc_tibble <- bind_rows(
make_nc_tibble(nc_ha28_degree, "HA", "degree"),
make_nc_tibble(nc_cr28_degree, "CR", "degree"),
)
p_nc <- ggplot(nc_tibble, aes(x, y, color = state)) +
geom_line() +
theme_bw() +
scale_color_manual(values = npg_colors_2) +
theme(legend.position = c(0.87, 0.87), legend.title = element_blank()) +
labs(x = "Percentage of removed nodes", y = "Natural connectivity")
In general, there were more positive correlations than negative correlations in both ecological networks (Fig. 5A,B, Supplementary Table S6).
# import svg
rsvg_svg("data/net_cytoscape/ha28.svg", "data/net_cytoscape/ha28_cairo.svg")
rsvg_svg("data/net_cytoscape/cr28.svg", "data/net_cytoscape/cr28_cairo.svg")
ha_picture <- grImport2::readPicture("data/net_cytoscape/ha28_cairo.svg")
cr_picture <- grImport2::readPicture("data/net_cytoscape/cr28_cairo.svg")
# legend
cols <- ggsci::pal_npg(palette = c("nrc"), alpha = 1)(10) %>%
stringr::str_sub(1, 7) %>%
rev
phylums <- unique(vertex_attr(net_ha28, "Phylum"))
phylums <- c(setdiff(phylums, "Unclassified"), "Unclassified")
lgd <- ComplexHeatmap::Legend(at = sort(phylums), title = "Phylum",
title_position = "leftcenter",
legend_gp = gpar(fill = cols),
direction = "horizontal",
nrow = 2
)
# generate figure 5
generate_figure5 <- function(ha_picture, cr_picture, legend, p_dd, p_nc) {
tag_text <- textGrob("A", gp = gpar(cex = 1.2))
grid.newpage()
g_lay <- grid.layout(5,4,
widths = unit.c(
unit(1.5, "grobwidth", tag_text), unit(1, "null"),
unit(1.5, "grobwidth", tag_text), unit(1, "null")),
heights = unit.c(
unit(1.5, "grobheight", tag_text),
unit(1, "null"),
unit(5, "grobheight", tag_text),
unit(1.5, "grobheight", tag_text),
unit(1, "null")
)
)
pushViewport(viewport(layout = g_lay))
pushViewport(viewport(layout.pos.row = 1, layout.pos.col = 1, name = "tag_1"))
grid.text("A", gp = gpar(cex = 1.2))
upViewport()
pushViewport(viewport(layout.pos.row = 2, layout.pos.col = c(1,2), name = "ha_net"))
grImport2::grid.picture(ha_picture, expansion = 0, ext = "clipbbox", width = unit(1, "npc"))
upViewport()
pushViewport(viewport(layout.pos.row = 1, layout.pos.col = 3, name = "tag_2"))
grid.text("B", gp = gpar(cex = 1.2))
upViewport()
pushViewport(viewport(layout.pos.row = 2, layout.pos.col = c(3,4), name = "cr_net"))
grImport2::grid.picture(cr_picture, expansion = 0, ext = "clipbbox", width = unit(1, "npc"))
upViewport()
pushViewport(viewport(layout.pos.row = 3, name = "legend"))
draw(lgd)
upViewport()
pushViewport(viewport(layout.pos.row = 4, layout.pos.col = 1, name = "tag_3"))
grid.text("C", gp = gpar(cex = 1.2))
upViewport()
pushViewport(viewport(layout.pos.row = 4, layout.pos.col = 3, name = "tag_4"))
grid.text("D", gp = gpar(cex = 1.2))
upViewport()
pushViewport(viewport(layout.pos.row = 5, layout.pos.col = c(1,2), name = "degree"))
grid.draw(ggplotGrob(p_dd))
upViewport()
pushViewport(viewport(layout.pos.row = 5, layout.pos.col = c(3,4), name = "robust"))
grid.draw(ggplotGrob(p_nc))
}
generate_figure5(ha_picture, cr_picture, lgd, p_dd, p_nc)
Figure 5. Ecological networks inferred using SPIEC-EASI in HA (A) and CR (B) group. Nodes represents OTUs and colored by its phylum; red edges represent positive correlations and green edges represent negative correlations. (C) Degree distributions of the two networks are similar. (D) Natural Connectivity was used to measure the robustness of networks by sequentially removing high degree nodes. The result showed that HA network was more robust than CR network.
pdf("output/figure/figure_f5.pdf", height = 8, width = 8)
generate_figure5(ha_picture, cr_picture, lgd, p_dd, p_nc)
dev.off()
## png
## 2
Supplementary Table S6
edge_ha28 <- igraph::as_data_frame(net_ha28)
vertex_ha28 <- igraph::as_data_frame(net_ha28, "vertices")
edge_ha28$from_genus <- vertex_ha28$Genus[match(edge_ha28[, 1], vertex_ha28$name)]
edge_ha28$to_genus <- vertex_ha28$Genus[match(edge_ha28[, 2], vertex_ha28$name)]
edge_ha28$color <- NULL
edge_ha28 <- dplyr::rename(edge_ha28, `spiec-easi correlation` = weight) %>%
mutate(
from_genus = ifelse(from_genus != "g__", from_genus, NA),
to_genus = ifelse(to_genus != "g__", to_genus, NA),
type = ifelse( `spiec-easi correlation` > 0, "positive", "negative")) %>%
mutate(`intra genus` = ifelse(from_genus == to_genus, "true", "false"))
edge_ha_intra_genus <- filter(edge_ha28, from_genus == to_genus)
edge_ha_inter_genus <- filter(edge_ha28, from_genus != to_genus)
edge_cr28 <- igraph::as_data_frame(net_cr28)
vertex_cr28 <- igraph::as_data_frame(net_cr28, "vertices")
edge_cr28$from_genus <- vertex_cr28$Genus[match(edge_cr28[, 1], vertex_cr28$name)]
edge_cr28$to_genus <- vertex_cr28$Genus[match(edge_cr28[, 2], vertex_cr28$name)]
edge_cr28$color <- NULL
edge_cr28 <- dplyr::rename(edge_cr28, `spiec-easi correlation` = weight) %>%
mutate(
from_genus = ifelse(from_genus != "g__", from_genus, NA),
to_genus = ifelse(to_genus != "g__", to_genus, NA),
type = ifelse( `spiec-easi correlation` > 0, "positive", "negative")) %>%
mutate(`intra genus` = ifelse(from_genus == to_genus, "true", "false"))
write_xlsx(list(`HA network` = edge_ha28, `CR network` = edge_cr28),
path = "output/table/table_s6.xlsx")
We then compared the degrees of OTUs in the four significantly different genera Blautia, Oscillospira, Lactobacillus, Allobaculum. The degree of Lactobacillus in HA network was significantly lower than in CR network (P = 0.0396, Wilcoxon sum test, FDR corrected), while degrees of OTUs in other three genera were similar in the two networks (Supplementary Fig. S5).
degree_cr28 <- igraph::degree(net_cr28, normalized = TRUE)
degree_ha28 <- igraph::degree(net_ha28, normalized = TRUE)
degree_df <- tibble(
degree = c(degree_cr28, degree_ha28),
genus = c(vertex_cr28$Genus, vertex_ha28$Genus),
state = rep(c("CR", 'HA'), each = length(degree_cr28))
)
marker_genus <- filter(degree_df, genus %in%
c("g__Blautia", "g__Oscillospira", "g__Lactobacillus", "g__Allobaculum")
)
p_s5 <- ggplot(marker_genus, aes(state, degree, fill = state)) +
geom_boxplot() +
facet_wrap(~genus, ncol = 4) +
ggpubr::stat_compare_means(
comparisons = list(c("CR", "HA")), label = "p.signif") +
theme_bw() +
theme(legend.position = "none") +
scale_fill_manual(values = npg_colors_2) +
labs(x = NULL, y = "Degree")
p_s5
Figure S5. The degrees of OTUs in the four significant different genera of inferred ecological networks. P values are from Wilcoxon rank sum test. P value: * P < 0.05; ns, no significance P > 0.05.
ggsave("output/figure/figure_s5.pdf", p_s5, width = 7, height = 4)
sessionInfo()
## R version 3.6.0 (2019-04-26)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 16.04.6 LTS
##
## Matrix products: default
## BLAS: /usr/lib/openblas-base/libblas.so.3
## LAPACK: /usr/lib/libopenblasp-r0.2.18.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] grid parallel stats4 tools stats graphics grDevices
## [8] utils datasets methods base
##
## other attached packages:
## [1] writexl_1.1 furrr_0.1.0
## [3] future_1.14.0 grImport2_0.1-5
## [5] rsvg_1.3 igraph_1.2.4.1
## [7] SpiecEasi_1.0.6 ggsignif_0.5.0
## [9] ggfortify_0.4.7 circlize_0.4.6
## [11] treeio_1.8.1 ggtree_1.16.3
## [13] ComplexHeatmap_2.0.0 DESeq2_1.24.0
## [15] SummarizedExperiment_1.14.0 DelayedArray_0.10.0
## [17] BiocParallel_1.18.0 matrixStats_0.54.0
## [19] Biobase_2.44.0 GenomicRanges_1.36.0
## [21] GenomeInfoDb_1.20.0 IRanges_2.18.1
## [23] S4Vectors_0.22.0 BiocGenerics_0.30.0
## [25] hrbrthemes_0.7.1 vegan_2.5-5
## [27] lattice_0.20-38 permute_0.9-5
## [29] patchwork_0.0.1 ggpubr_0.2.1
## [31] magrittr_1.5 eulerr_5.1.0
## [33] microbiome_1.6.0 phyloseq_1.28.0
## [35] ggsci_2.9 forcats_0.4.0
## [37] stringr_1.4.0 dplyr_0.8.3
## [39] purrr_0.3.2 readr_1.3.1
## [41] tidyr_1.0.0 tibble_2.1.3
## [43] ggplot2_3.2.1 tidyverse_1.2.1
## [45] knitr_1.23
##
## loaded via a namespace (and not attached):
## [1] readxl_1.3.1 backports_1.1.4 Hmisc_4.2-0
## [4] VGAM_1.1-1 plyr_1.8.4 lazyeval_0.2.2
## [7] polylabelr_0.1.0 splines_3.6.0 listenv_0.7.0
## [10] digest_0.6.20 foreach_1.4.7 htmltools_0.3.6
## [13] checkmate_1.9.4 memoise_1.1.0 cluster_2.0.8
## [16] globals_0.12.4 Biostrings_2.52.0 annotate_1.62.0
## [19] modelr_0.1.4 extrafont_0.17 extrafontdb_1.0
## [22] jpeg_0.1-8 colorspace_1.4-1 blob_1.2.0
## [25] rvest_0.3.4 haven_2.1.1 xfun_0.8
## [28] crayon_1.3.4 RCurl_1.95-4.12 jsonlite_1.6
## [31] genefilter_1.66.0 zeallot_0.1.0 survival_2.44-1.1
## [34] iterators_1.0.12 ape_5.3 glue_1.3.1
## [37] polyclip_1.10-0 gtable_0.3.0 zlibbioc_1.30.0
## [40] XVector_0.24.0 GetoptLong_0.1.7 Rhdf5lib_1.6.0
## [43] Rttf2pt1_1.3.7 shape_1.4.4 scales_1.0.0
## [46] DBI_1.0.0 Rcpp_1.0.2 xtable_1.8-4
## [49] htmlTable_1.13.1 clue_0.3-57 tidytree_0.2.5
## [52] foreign_0.8-71 bit_1.1-14 Formula_1.2-3
## [55] htmlwidgets_1.3 httr_1.4.0 pulsar_0.3.5
## [58] RColorBrewer_1.1-2 ellipsis_0.2.0.1 acepack_1.4.1
## [61] pkgconfig_2.0.2 XML_3.98-1.20 nnet_7.3-12
## [64] locfit_1.5-9.1 labeling_0.3 tidyselect_0.2.5
## [67] rlang_0.4.0 reshape2_1.4.3 AnnotationDbi_1.46.0
## [70] munsell_0.5.0 cellranger_1.1.0 cli_1.1.0
## [73] generics_0.0.2 RSQLite_2.1.2 ade4_1.7-13
## [76] broom_0.5.2 evaluate_0.14 biomformat_1.12.0
## [79] yaml_2.2.0 bit64_0.9-7 nlme_3.1-139
## [82] xml2_1.2.0 compiler_3.6.0 rstudioapi_0.10
## [85] png_0.1-7 huge_1.3.2 geneplotter_1.62.0
## [88] stringi_1.4.3 highr_0.8 gdtools_0.1.9
## [91] Matrix_1.2-17 multtest_2.40.0 vctrs_0.2.0
## [94] pillar_1.4.2 lifecycle_0.1.0 GlobalOptions_0.1.0
## [97] cowplot_1.0.0 data.table_1.12.2 bitops_1.0-6
## [100] R6_2.4.0 latticeExtra_0.6-28 gridExtra_2.3
## [103] codetools_0.2-16 MASS_7.3-51.1 assertthat_0.2.1
## [106] rhdf5_2.28.0 rjson_0.2.20 withr_2.1.2
## [109] GenomeInfoDbData_1.2.1 mgcv_1.8-28 hms_0.5.0
## [112] rpart_4.1-15 rvcheck_0.1.3 rmarkdown_1.14
## [115] Rtsne_0.15 lubridate_1.7.4 base64enc_0.1-3